*Import BC data for eWIC analysis
*last modified: 16 February 2025
*last modified by: Kathya Tapia-Schythe

*-------------------------------------------------------------------------------
*--- Preamble
*-------------------------------------------------------------------------------

clear all
set more off
set rmsg on

set maxvar 120000
set emptycells drop

*-------------------------------------------------------------------------------
*--- Directories and Log 
*-------------------------------------------------------------------------------

local data_dir ./data/cleaned
local raw_dir ./data/raw
local out_dir ./analysis/output
local graph_dir ./analysis/output/graphs
local tab_dir ./analysis/output/tables
local log_dir ./documentation/logs
local date: display %tdYY-NN-DD date(c(current_date), "DMY")
di "`date'"
capture log close

log using `log_dir'/data_bc`date', replace

*-------------------------------------------------------------------------------
*--- County Business Patterns with Imputation Database
*-------------------------------------------------------------------------------

import delimited `raw_dir'/bc/efsy_panel_naics.csv, delimiter(comma) varnames(1) clear

drop v1

local keep_allefsy 
* NAICS codes
/* 445110 Supermarket & grocery stores
   445120 Convenience stores
   4452// Specialty food stores - observed in 1975-1997 
          (see: https://www.census.gov/naics/?input=44512&year=2012)
   452910 Warehouses & supercenters */
local keep_efsy keep if naics12=="445110" | naics12=="445120" | naics12=="4452//" | naics12=="452910" 
   
drop if year<2005 // Select 2005-2018

foreach sample in allefsy efsy {
  
  preserve
 `keep_`sample''
 tab naics12, m
 destring naics12, replace

 gen ct_fips= real(string(fipstate) + string(fipscty,"%03.0f"))

 misstable sum // missingness

 collapse (mean)avg_emp=emp (sum)emp (mean)fipstate (mean)fipscty, by(ct_fips year) // county-year employment

 rename year fiscalyear
 tab fiscalyear

 replace ct_fips=46102 if ct_fips==46113 // change Oglala Lakota County, SD in 2014

 lab data "Imputing Missing Values in the US Census Bureau's County Business Patterns. Eckert,Fort, Schott, and Yang"
 notes: Source https://fpeckert.me/cbp/
 save `data_dir'/`sample'_panel_naics.dta, replace 
 restore
 
}

*-------------------------------------------------------------------------------
*--- CBP U.S. Census Bureau
*-------------------------------------------------------------------------------

local years 05 06 07 08 09 10 11 12 13 14 15 16 17 18

foreach y of local years {
 tempfile cbp`y'
 import delimited `raw_dir'/bc/cbp`y'co.txt, delimiter(comma) varnames(1) clear
 keep fipstate fipscty naics emp est
 rename est estab
 gen fiscalyear=`y'+2000
 save `cbp`y'', replace
}

clear
foreach y of local years {
 append using `cbp`y''
}

/* 445110 Supermarket & grocery stores
   445120 Convenience stores
   4452// Specialty food stores - observed in 1975-1997 
          (see: https://www.census.gov/naics/?input=44512&year=2012)
   452910 Warehouses & supercenters */
keep if naics=="445110" | naics=="445120" | naics=="4452//" | ///
        naics=="452910" 
   
tab naics
destring naics, replace

gen ct_fips= real(string(fipstate) + string(fipscty,"%03.0f"))

misstable sum // missingness

collapse (mean)avg_emp=emp (sum)emp (sum)est (mean)fipstate (mean)fipscty, by(ct_fips fiscalyear) // county-year employment & establishments

tab fiscalyear

replace ct_fips=46102 if ct_fips==46113 // change Oglala Lakota County, SD in 2014

lab data "County Business Patterns, U.S. Census Bureau"
notes: Source https://www.census.gov/programs-surveys/cbp/data/datasets.html
save `data_dir'/cbp_panel_naics.dta, replace 

*-------------------------------------------------------------------------------
*--- County Population +15 NIH
*-------------------------------------------------------------------------------

/*Age-gorups: 0 -> 0     Years		10 -> 45-49 Years
              1 -> 1-4   Years		11 -> 50-54 Years
              2 -> 5-9   Years		12 -> 55-59 Years
              3 -> 10-14 Years		13 -> 60-64 Years
              4 -> 15-19 Years		14 -> 65-69 Years
              5 -> 20-24 Years		15 -> 70-74 Years
              6 -> 25-29 Years		16 -> 75-79 Years
              7 -> 30-34 Years		18 -> 85+   Years
              8 -> 35-39 Years
              9 -> 40-44 Years*/

local states ak al ar az ca co ct dc de fl ga hi ia id il in ks ky la ma md ///
             me mi mn mo ms mt nc nd ne nh nj nm nv ny oh ok or pa ri sc sd ///
			 tn tx ut va vt wa wi wv wy 

foreach st of local states {
 tempfile pop`st'			 
 #delimit ;
 infix fiscalyear 1-4 str state 5-6 fipstate 7-8 fipscty 9-11 registry 12-13 
       race 14 origin 15 sex 16 age_group 17-18  pop 19-26 
	   using `raw_dir'/bc/`st'.1969_2022.19ages.txt, clear;
 #delimit cr
 save `pop`st'', replace
}

clear 
foreach st of local states {
 append using `pop`st''
}

drop if age_group==0 | age_group==1 | age_group==2 | age_group==3

gen ct_fips= real(string(fipstate) + string(fipscty,"%03.0f"))

collapse (mean)fipstate (mean)fipscty (sum)pop , by(ct_fips fiscalyear)

lab var pop "Population +15"

replace ct_fips=46102 if ct_fips==46113 // change Oglala Lakota County, SD in 2014

lab data "U.S. County Population Data, NIH"
notes: Source https://seer.cancer.gov/popdata/download.html#19
save `data_dir'/population_15plus.dta, replace


log close
